## steps
# extract data on instrument type level
# import debtor dataset
# inner join between the two datasets to remove redundant debtors and import size flag + nace_code
# exclude only observations that do not have any amount reported

# final datasets
import pandas as pd
import pyodbc
import os

path_data = r'P:\ECB business areas\DGR\Staff\papouts\Research\Active\Firm Heterogeneities\Final datasets'
if not os.path.isdir(path_data):
    os.makedirs(path_data)

# instrument level
# NULL values are ignored when summed

sql_conn = pyodbc.connect('DSN=DISC DP Impala 64bit',autocommit=True)

query = """

 SELECT DISTINCT cntry_dbtr, 
                 dbtr_id, 
                 nace_code,
                 typ_instr_corr,
                 ona_dbtr,
                 outst_amnt_byinstr,
                 prtctn_amnt_byinstr,
                 ona_intrate_c_ttl,
                 ona_orgnlmtrty_c_ttl,
                 ona_pddbtr_c_ttl,
                 w_intrate_c_ttl/ona_intrate_c_ttl AS wir_byloantype,
                 w_orgnlmtrty_c_ttl/ona_orgnlmtrty_c_ttl AS mtrty_byloantype,
                 w_pddbtr_c_ttl/ona_pddbtr_c_ttl AS pddbtr_byloantype
 
 FROM (
 SELECT          cntry_dbtr, 
                 dbtr_id, 
                 cntrct_id,
                 instrmnt_id,
                 typ_instr_corr,
                 nace_code,
                 otstndng_nmnl_amnt_cv_c,
                 prtctn_allctd_vl_inst_cv_c,
                 MAX(CASE WHEN default_aggr_dbtr_lvl = -99 AND default_dbtr_lvl = -99 THEN -99
                      WHEN (default_aggr_dbtr_lvl = 0 AND default_dbtr_lvl = -99) OR
                           (default_aggr_dbtr_lvl = 0 AND default_dbtr_lvl = 0) OR
                           (default_aggr_dbtr_lvl = -99 AND default_dbtr_lvl = 0) THEN 0
                      ELSE 1 END) OVER (PARTITION BY cntry_dbtr, dbtr_id) AS dflt_stts_debtor,
                 outst_amnt_byinstr,
                 prtctn_amnt_byinstr,
                 ona_dbtr,
                 SUM(ona_intrate_c) OVER (PARTITION BY cntry_dbtr, dbtr_id, typ_instr_corr) AS ona_intrate_c_ttl,
                 SUM(ona_orgnlmtrty_c) OVER (PARTITION BY cntry_dbtr, dbtr_id, typ_instr_corr) AS ona_orgnlmtrty_c_ttl,
                 SUM(ona_pddbtr_c) OVER (PARTITION BY cntry_dbtr, dbtr_id, typ_instr_corr) AS ona_pddbtr_c_ttl,
                 SUM(annlsd_agrd_rt_c*ona_intrate_c) OVER (PARTITION BY cntry_dbtr, dbtr_id, typ_instr_corr) AS w_intrate_c_ttl,
                 SUM(orgnl_mtrty_c_y*ona_orgnlmtrty_c) OVER (PARTITION BY cntry_dbtr, dbtr_id, typ_instr_corr) AS w_orgnlmtrty_c_ttl,
                 SUM(pd_dbtr_c*ona_pddbtr_c) OVER (PARTITION BY cntry_dbtr, dbtr_id, typ_instr_corr) AS w_pddbtr_c_ttl
                 
                 
 FROM (
 SELECT          cntry_dbtr,
                 dbtr_id, 
                 cntrct_id,
                 instrmnt_id,
                 typ_instr_corr,
                 t2.nace_code,
                 otstndng_nmnl_amnt_cv_c,
                 prtctn_allctd_vl_inst_cv_c,
                 CASE WHEN (annlsd_agrd_rt < 0 OR annlsd_agrd_rt > 0.3) THEN NULL ELSE annlsd_agrd_rt END AS annlsd_agrd_rt_c,
                 CASE WHEN (annlsd_agrd_rt < 0 OR annlsd_agrd_rt > 0.3) THEN NULL ELSE otstndng_nmnl_amnt_cv_c END AS ona_intrate_c,
                 CASE WHEN orgnl_mtrty_c/365 > 30 THEN 30 ELSE orgnl_mtrty_c/365 END AS orgnl_mtrty_c_y,
                 CASE WHEN orgnl_mtrty_c IS NULL THEN NULL ELSE otstndng_nmnl_amnt_cv_c END AS ona_orgnlmtrty_c,
                 CASE WHEN (pd_dbtr > 0 AND pd_dbtr <= 1) THEN pd_dbtr ELSE NULL END AS pd_dbtr_c,
                 CASE WHEN (pd_dbtr > 0 AND pd_dbtr <= 1) THEN otstndng_nmnl_amnt_cv_c ELSE NULL END AS ona_pddbtr_c,                
                 MAX(dflt_stts_instr_flag) OVER (PARTITION BY cntry_dbtr, dbtr_id) AS default_aggr_dbtr_lvl,
                 MAX(dflt_stts_debtor_flag) OVER (PARTITION BY cntry_dbtr, dbtr_id) AS default_dbtr_lvl,
                 SUM(otstndng_nmnl_amnt_cv_c) OVER (PARTITION BY cntry_dbtr, dbtr_id, typ_instr_corr) AS outst_amnt_byinstr,
                 SUM(prtctn_allctd_vl_inst_cv_c) OVER (PARTITION BY cntry_dbtr, dbtr_id, typ_instr_corr) AS prtctn_amnt_byinstr,
                 SUM(otstndng_nmnl_amnt_cv_c) OVER (PARTITION BY cntry_dbtr, dbtr_id) AS ona_dbtr
                 
                 

FROM            lab_org_dgr_fir.ac_instr_201912_kk t1
LEFT JOIN       lab_org_dgr_fir.ac_dbtr_201912_kk t2
ON              t1.dbtr_id=t2.entty_riad_id

WHERE           nvl(otstndng_nmnl_amnt_cv_c,0) + nvl(off_blnc_sht_amnt_cv,0) + nvl(cmmtmnt_incptn_cv,0) > 0                              
                AND prjct_fnnc_ln != 1
      
          ) tbl
 
      
                                ) tbl2
                                                
WHERE    dflt_stts_debtor = 0 
         and ona_dbtr IS NOT NULL

                                                
                """
                
dd_instr = pd.read_sql(query, sql_conn)
sql_conn.close()

# import final debtors dataset
dbtr_f = pd.read_stata(path_data + r'\dbtr_combined_dt.dta')

# merge datasets and take size flag
dd_merged = dd_instr.merge(dbtr_f[['cntry', 'entty_riad_id','sz_f']], how='left', left_on=['cntry_dbtr', 'dbtr_id'], right_on=['cntry', 'entty_riad_id'])

# final dataset
dd_final = dd_merged[dd_merged.sz_f.isna() == False].drop(columns=['cntry', 'entty_riad_id'])

# export dataset
dd_final.to_stata(path_data + '\instr_lvl_dt_f.dta', write_index=False)

